import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
df=pd.read_csv(r"C:\Users\priya\Downloads\Telegram Desktop\911.csv")
df
| lat | lng | desc | zip | title | timeStamp | twp | addr | e | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 40.297876 | -75.581294 | REINDEER CT & DEAD END; NEW HANOVER; Station ... | 19525.0 | EMS: BACK PAINS/INJURY | 2015-12-10 17:40:00 | NEW HANOVER | REINDEER CT & DEAD END | 1 |
| 1 | 40.258061 | -75.264680 | BRIAR PATH & WHITEMARSH LN; HATFIELD TOWNSHIP... | 19446.0 | EMS: DIABETIC EMERGENCY | 2015-12-10 17:40:00 | HATFIELD TOWNSHIP | BRIAR PATH & WHITEMARSH LN | 1 |
| 2 | 40.121182 | -75.351975 | HAWS AVE; NORRISTOWN; 2015-12-10 @ 14:39:21-St... | 19401.0 | Fire: GAS-ODOR/LEAK | 2015-12-10 17:40:00 | NORRISTOWN | HAWS AVE | 1 |
| 3 | 40.116153 | -75.343513 | AIRY ST & SWEDE ST; NORRISTOWN; Station 308A;... | 19401.0 | EMS: CARDIAC EMERGENCY | 2015-12-10 17:40:01 | NORRISTOWN | AIRY ST & SWEDE ST | 1 |
| 4 | 40.251492 | -75.603350 | CHERRYWOOD CT & DEAD END; LOWER POTTSGROVE; S... | NaN | EMS: DIZZINESS | 2015-12-10 17:40:01 | LOWER POTTSGROVE | CHERRYWOOD CT & DEAD END | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 99487 | 40.132869 | -75.333515 | MARKLEY ST & W LOGAN ST; NORRISTOWN; 2016-08-2... | 19401.0 | Traffic: VEHICLE ACCIDENT - | 2016-08-24 11:06:00 | NORRISTOWN | MARKLEY ST & W LOGAN ST | 1 |
| 99488 | 40.006974 | -75.289080 | LANCASTER AVE & RITTENHOUSE PL; LOWER MERION; ... | 19003.0 | Traffic: VEHICLE ACCIDENT - | 2016-08-24 11:07:02 | LOWER MERION | LANCASTER AVE & RITTENHOUSE PL | 1 |
| 99489 | 40.115429 | -75.334679 | CHESTNUT ST & WALNUT ST; NORRISTOWN; Station ... | 19401.0 | EMS: FALL VICTIM | 2016-08-24 11:12:00 | NORRISTOWN | CHESTNUT ST & WALNUT ST | 1 |
| 99490 | 40.186431 | -75.192555 | WELSH RD & WEBSTER LN; HORSHAM; Station 352; ... | 19002.0 | EMS: NAUSEA/VOMITING | 2016-08-24 11:17:01 | HORSHAM | WELSH RD & WEBSTER LN | 1 |
| 99491 | 40.207055 | -75.317952 | MORRIS RD & S BROAD ST; UPPER GWYNEDD; 2016-08... | 19446.0 | Traffic: VEHICLE ACCIDENT - | 2016-08-24 11:17:02 | UPPER GWYNEDD | MORRIS RD & S BROAD ST | 1 |
99492 rows × 9 columns
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 99492 entries, 0 to 99491 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 lat 99492 non-null float64 1 lng 99492 non-null float64 2 desc 99492 non-null object 3 zip 86637 non-null float64 4 title 99492 non-null object 5 timeStamp 99492 non-null object 6 twp 99449 non-null object 7 addr 98973 non-null object 8 e 99492 non-null int64 dtypes: float64(3), int64(1), object(5) memory usage: 6.8+ MB
df.head()
| lat | lng | desc | zip | title | timeStamp | twp | addr | e | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 40.297876 | -75.581294 | REINDEER CT & DEAD END; NEW HANOVER; Station ... | 19525.0 | EMS: BACK PAINS/INJURY | 2015-12-10 17:40:00 | NEW HANOVER | REINDEER CT & DEAD END | 1 |
| 1 | 40.258061 | -75.264680 | BRIAR PATH & WHITEMARSH LN; HATFIELD TOWNSHIP... | 19446.0 | EMS: DIABETIC EMERGENCY | 2015-12-10 17:40:00 | HATFIELD TOWNSHIP | BRIAR PATH & WHITEMARSH LN | 1 |
| 2 | 40.121182 | -75.351975 | HAWS AVE; NORRISTOWN; 2015-12-10 @ 14:39:21-St... | 19401.0 | Fire: GAS-ODOR/LEAK | 2015-12-10 17:40:00 | NORRISTOWN | HAWS AVE | 1 |
| 3 | 40.116153 | -75.343513 | AIRY ST & SWEDE ST; NORRISTOWN; Station 308A;... | 19401.0 | EMS: CARDIAC EMERGENCY | 2015-12-10 17:40:01 | NORRISTOWN | AIRY ST & SWEDE ST | 1 |
| 4 | 40.251492 | -75.603350 | CHERRYWOOD CT & DEAD END; LOWER POTTSGROVE; S... | NaN | EMS: DIZZINESS | 2015-12-10 17:40:01 | LOWER POTTSGROVE | CHERRYWOOD CT & DEAD END | 1 |
zip_column = 'zip'
top_zipcodes = df[zip_column].value_counts().head(5)
"Top 5 zip codes for 911 calls:"
top_zipcodes
19401.0 6979 19464.0 6643 19403.0 4854 19446.0 4748 19406.0 3174 Name: zip, dtype: int64
twp_column = 'twp'
top_townships = df[twp_column].value_counts().head(5)
"Top 5 townships for 911 calls:"
top_townships
LOWER MERION 8443 ABINGTON 5977 NORRISTOWN 5890 UPPER MERION 5227 CHELTENHAM 4575 Name: twp, dtype: int64
title_column = 'title'
unique_title_codes = df[title_column].nunique()
"Number of unique title codes:", unique_title_codes
('Number of unique title codes:', 110)
df['Reasons'] = df[title_column].apply(lambda title: title.split(':')[0].strip())
df
| lat | lng | desc | zip | title | timeStamp | twp | addr | e | Reasons | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 40.297876 | -75.581294 | REINDEER CT & DEAD END; NEW HANOVER; Station ... | 19525.0 | EMS: BACK PAINS/INJURY | 2015-12-10 17:40:00 | NEW HANOVER | REINDEER CT & DEAD END | 1 | EMS |
| 1 | 40.258061 | -75.264680 | BRIAR PATH & WHITEMARSH LN; HATFIELD TOWNSHIP... | 19446.0 | EMS: DIABETIC EMERGENCY | 2015-12-10 17:40:00 | HATFIELD TOWNSHIP | BRIAR PATH & WHITEMARSH LN | 1 | EMS |
| 2 | 40.121182 | -75.351975 | HAWS AVE; NORRISTOWN; 2015-12-10 @ 14:39:21-St... | 19401.0 | Fire: GAS-ODOR/LEAK | 2015-12-10 17:40:00 | NORRISTOWN | HAWS AVE | 1 | Fire |
| 3 | 40.116153 | -75.343513 | AIRY ST & SWEDE ST; NORRISTOWN; Station 308A;... | 19401.0 | EMS: CARDIAC EMERGENCY | 2015-12-10 17:40:01 | NORRISTOWN | AIRY ST & SWEDE ST | 1 | EMS |
| 4 | 40.251492 | -75.603350 | CHERRYWOOD CT & DEAD END; LOWER POTTSGROVE; S... | NaN | EMS: DIZZINESS | 2015-12-10 17:40:01 | LOWER POTTSGROVE | CHERRYWOOD CT & DEAD END | 1 | EMS |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 99487 | 40.132869 | -75.333515 | MARKLEY ST & W LOGAN ST; NORRISTOWN; 2016-08-2... | 19401.0 | Traffic: VEHICLE ACCIDENT - | 2016-08-24 11:06:00 | NORRISTOWN | MARKLEY ST & W LOGAN ST | 1 | Traffic |
| 99488 | 40.006974 | -75.289080 | LANCASTER AVE & RITTENHOUSE PL; LOWER MERION; ... | 19003.0 | Traffic: VEHICLE ACCIDENT - | 2016-08-24 11:07:02 | LOWER MERION | LANCASTER AVE & RITTENHOUSE PL | 1 | Traffic |
| 99489 | 40.115429 | -75.334679 | CHESTNUT ST & WALNUT ST; NORRISTOWN; Station ... | 19401.0 | EMS: FALL VICTIM | 2016-08-24 11:12:00 | NORRISTOWN | CHESTNUT ST & WALNUT ST | 1 | EMS |
| 99490 | 40.186431 | -75.192555 | WELSH RD & WEBSTER LN; HORSHAM; Station 352; ... | 19002.0 | EMS: NAUSEA/VOMITING | 2016-08-24 11:17:01 | HORSHAM | WELSH RD & WEBSTER LN | 1 | EMS |
| 99491 | 40.207055 | -75.317952 | MORRIS RD & S BROAD ST; UPPER GWYNEDD; 2016-08... | 19446.0 | Traffic: VEHICLE ACCIDENT - | 2016-08-24 11:17:02 | UPPER GWYNEDD | MORRIS RD & S BROAD ST | 1 | Traffic |
99492 rows × 10 columns
df.columns
Index(['lat', 'lng', 'desc', 'zip', 'title', 'timeStamp', 'twp', 'addr', 'e',
'Reasons'],
dtype='object')
reasons_column = 'Reasons'
reasons_counts = df[reasons_column].value_counts()
reasons_counts
EMS 48877 Traffic 35695 Fire 14920 Name: Reasons, dtype: int64
plt.figure(figsize=(8, 6))
plt.title('911 Calls by Reasons')
sns.countplot(x=reasons_column, data=df, palette='viridis')
<Axes: title={'center': '911 Calls by Reasons'}, xlabel='Reasons', ylabel='count'>
type(df['timeStamp'].iloc[0])
str
df['timeStamp'] = pd.to_datetime(df['timeStamp'], format='%Y-%m-%d %H:%M:%S')
time = df['timeStamp'].iloc[0]
time.hour
17
df['Date'] = df['timeStamp'].dt.to_period('M')
month_names = {
1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr',
5: 'May', 6: 'Jun', 7: 'Jul', 8: 'Aug',
9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec'}
"Data type of 'timeStamp' column after conversion:", df['timeStamp'].iloc[0]
("Data type of 'timeStamp' column after conversion:",
Timestamp('2015-12-10 17:40:00'))
df['Hour'] = df['timeStamp'].apply(lambda time: time.hour)
df['Month'] = df['timeStamp'].apply(lambda time: time.month)
df['Day of Week'] = df['timeStamp'].apply(lambda time: time.dayofweek)
df[['Hour', 'Month', 'Day of Week']].head()
| Hour | Month | Day of Week | |
|---|---|---|---|
| 0 | 17 | 12 | 3 |
| 1 | 17 | 12 | 3 |
| 2 | 17 | 12 | 3 |
| 3 | 17 | 12 | 3 |
| 4 | 17 | 12 | 3 |
dmap = {0: 'Mon', 1: 'Tue', 2: 'Wed', 3: 'Thu', 4: 'Fri', 5: 'Sat', 6: 'Sun'}
df['Day of Week'] = df['Day of Week'].map(dmap)
df[['Hour', 'Month', 'Day of Week']].head()
| Hour | Month | Day of Week | |
|---|---|---|---|
| 0 | 17 | 12 | Thu |
| 1 | 17 | 12 | Thu |
| 2 | 17 | 12 | Thu |
| 3 | 17 | 12 | Thu |
| 4 | 17 | 12 | Thu |
plt.figure(figsize=(10, 6))
sns.countplot(x='Day of Week', data=df, hue='Reasons')
plt.title('911 Calls by Day of Week (Reasons-wise)')
plt.legend(loc='upper right', bbox_to_anchor=(1.15, 1),borderaxespad=0)
<matplotlib.legend.Legend at 0x230627ae8d0>
plt.figure(figsize=(10, 6))
sns.countplot(x='Month', data=df, hue='Reasons')
plt.title('911 Calls by Day of Week (Reasons-wise)')
plt.legend(loc='upper right', bbox_to_anchor=(1.15, 1),borderaxespad=0)
<matplotlib.legend.Legend at 0x2305f945c50>
byMonth = df.groupby('Month').count()
byMonth.head()
| lat | lng | desc | zip | title | timeStamp | twp | addr | e | Reasons | Date | Hour | Day of Week | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Month | |||||||||||||
| 1 | 13205 | 13205 | 13205 | 11527 | 13205 | 13205 | 13203 | 13096 | 13205 | 13205 | 13205 | 13205 | 13205 |
| 2 | 11467 | 11467 | 11467 | 9930 | 11467 | 11467 | 11465 | 11396 | 11467 | 11467 | 11467 | 11467 | 11467 |
| 3 | 11101 | 11101 | 11101 | 9755 | 11101 | 11101 | 11092 | 11059 | 11101 | 11101 | 11101 | 11101 | 11101 |
| 4 | 11326 | 11326 | 11326 | 9895 | 11326 | 11326 | 11323 | 11283 | 11326 | 11326 | 11326 | 11326 | 11326 |
| 5 | 11423 | 11423 | 11423 | 9946 | 11423 | 11423 | 11420 | 11378 | 11423 | 11423 | 11423 | 11423 | 11423 |
plt.figure(figsize=(10, 6))
byMonth['twp'].plot(kind='bar')
plt.title('911 Calls by Month')
Text(0.5, 1.0, '911 Calls by Month')
plt.figure(figsize=(10, 6))
byMonth['twp'].plot(kind='line')
plt.grid(True)
plt.title('911 Calls by Month')
Text(0.5, 1.0, '911 Calls by Month')
byMonth = byMonth.reset_index()
plt.figure(figsize=(10, 6))
sns.lmplot(x='Month', y='twp', data=byMonth)
plt.grid(True)
plt.title('Linear Fit of 911 Calls by Month')
Text(0.5, 1.0, 'Linear Fit of 911 Calls by Month')
<Figure size 1000x600 with 0 Axes>
df['Date'] = df['timeStamp'].apply(lambda x:x.date())
df['Date'].head()
0 2015-12-10 1 2015-12-10 2 2015-12-10 3 2015-12-10 4 2015-12-10 Name: Date, dtype: object
plt.figure(figsize=(10, 6))
Date = df.groupby('Date').count()['twp'].plot()
plt.tight_layout()
plt.grid(True)
plt.title('911 Calls per Date')
Text(0.5, 1.0, '911 Calls per Date')
plt.figure(figsize=(10, 6))
df_traffic = df[df['Reasons'] == 'Traffic']
df_traffic.groupby('Date').count()['twp'].plot()
plt.tight_layout()
plt.grid(True)
plt.title('911 Calls Due to Traffic')
Text(0.5, 1.0, '911 Calls Due to Traffic')
plt.figure(figsize=(10, 6))
df_traffic = df[df['Reasons'] == 'Fire']
df_traffic.groupby('Date').count()['twp'].plot()
plt.tight_layout()
plt.grid(True)
plt.title('911 Calls Due to Fire')
Text(0.5, 1.0, '911 Calls Due to Fire')
plt.figure(figsize=(10, 6))
df_traffic = df[df['Reasons'] == 'EMS']
df_traffic.groupby('Date').count()['twp'].plot()
plt.tight_layout()
plt.grid(True)
plt.title('911 Calls Due to EMS')
Text(0.5, 1.0, '911 Calls Due to EMS')
dayHour = df.groupby(['Day of Week','Hour']).count().unstack()['Reasons']
dayHour.head()
| Hour | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Day of Week | |||||||||||||||||||||
| Fri | 275 | 235 | 191 | 175 | 201 | 194 | 372 | 598 | 742 | 752 | ... | 932 | 980 | 1039 | 980 | 820 | 696 | 667 | 559 | 514 | 474 |
| Mon | 282 | 221 | 201 | 194 | 204 | 267 | 397 | 653 | 819 | 786 | ... | 869 | 913 | 989 | 997 | 885 | 746 | 613 | 497 | 472 | 325 |
| Sat | 375 | 301 | 263 | 260 | 224 | 231 | 257 | 391 | 459 | 640 | ... | 789 | 796 | 848 | 757 | 778 | 696 | 628 | 572 | 506 | 467 |
| Sun | 383 | 306 | 286 | 268 | 242 | 240 | 300 | 402 | 483 | 620 | ... | 684 | 691 | 663 | 714 | 670 | 655 | 537 | 461 | 415 | 330 |
| Thu | 278 | 202 | 233 | 159 | 182 | 203 | 362 | 570 | 777 | 828 | ... | 876 | 969 | 935 | 1013 | 810 | 698 | 617 | 553 | 424 | 354 |
5 rows × 24 columns
plt.figure(figsize=(10, 6))
sns.heatmap(dayHour,cmap='viridis')
plt.tight_layout()
plt.title('911 Calls as per Hours in a week')
Text(0.5, 1.0, '911 Calls as per Hours in a week')
plt.figure(figsize=(10,6))
sns.clustermap(dayHour, cmap='viridis')
plt.tight_layout()
plt.title('Cluster map_911 Calls as per Hours in a week')
Text(0.5, 1.0, 'Cluster map_911 Calls as per Hours in a week')
<Figure size 1000x600 with 0 Axes>
dayMonth = df.groupby(['Day of Week','Month']).count().unstack()['Reasons']
dayMonth.head()
| Month | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 12 |
|---|---|---|---|---|---|---|---|---|---|
| Day of Week | |||||||||
| Fri | 1970 | 1581 | 1525 | 1958 | 1730 | 1649 | 2045 | 1310 | 1065 |
| Mon | 1727 | 1964 | 1535 | 1598 | 1779 | 1617 | 1692 | 1511 | 1257 |
| Sat | 2291 | 1441 | 1266 | 1734 | 1444 | 1388 | 1695 | 1099 | 978 |
| Sun | 1960 | 1229 | 1102 | 1488 | 1424 | 1333 | 1672 | 1021 | 907 |
| Thu | 1584 | 1596 | 1900 | 1601 | 1590 | 2065 | 1646 | 1230 | 1266 |
plt.figure(figsize=(10,6))
sns.heatmap(dayMonth,cmap='viridis')
plt.tight_layout()
plt.title('911 Calls as per Months')
Text(0.5, 1.0, '911 Calls as per Months')
plt.figure(figsize=(10,6))
sns.clustermap(dayMonth, cmap='viridis')
plt.tight_layout()
plt.title('Cluster Map_911 Calls as per Months')
Text(0.5, 1.0, 'Cluster Map_911 Calls as per Months')
<Figure size 1000x600 with 0 Axes>
### percentage of 911 calls for each reason
total_calls = len(df)
total_calls
99492
reasons_counts = df['Reasons'].value_counts()
percentage_by_reason = (reasons_counts / total_calls * 100).round(2)
percentage_by_reason
EMS 49.13 Traffic 35.88 Fire 15.00 Name: Reasons, dtype: float64
plt.figure(figsize=(6, 4))
plt.pie(percentage_by_reason, labels=percentage_by_reason.index, autopct='%1.1f%%', startangle=140)
plt.title('Percentage of 911 Calls by Reasons')
plt.axis('equal')
plt.show()
# correaltion between numeric columns in df dataframe
numeric_columns = df.select_dtypes(include=['float64', 'int64']).columns
numeric_columns
Index(['lat', 'lng', 'zip', 'e', 'Hour', 'Month'], dtype='object')
correlation_matrix = df[numeric_columns].corr()
df[numeric_columns]
| lat | lng | zip | e | Hour | Month | |
|---|---|---|---|---|---|---|
| 0 | 40.297876 | -75.581294 | 19525.0 | 1 | 17 | 12 |
| 1 | 40.258061 | -75.264680 | 19446.0 | 1 | 17 | 12 |
| 2 | 40.121182 | -75.351975 | 19401.0 | 1 | 17 | 12 |
| 3 | 40.116153 | -75.343513 | 19401.0 | 1 | 17 | 12 |
| 4 | 40.251492 | -75.603350 | NaN | 1 | 17 | 12 |
| ... | ... | ... | ... | ... | ... | ... |
| 99487 | 40.132869 | -75.333515 | 19401.0 | 1 | 11 | 8 |
| 99488 | 40.006974 | -75.289080 | 19003.0 | 1 | 11 | 8 |
| 99489 | 40.115429 | -75.334679 | 19401.0 | 1 | 11 | 8 |
| 99490 | 40.186431 | -75.192555 | 19002.0 | 1 | 11 | 8 |
| 99491 | 40.207055 | -75.317952 | 19446.0 | 1 | 11 | 8 |
99492 rows × 6 columns
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Heatmap')
plt.show()
Calculating and visualising the response times of 911 emergency services can analyse how response times vary based on time of day, day of the week, or type of reason. This also helps us understand the minimum response time of emergency services in critical situations.
df.columns
Index(['lat', 'lng', 'desc', 'zip', 'title', 'timeStamp', 'twp', 'addr', 'e',
'Reasons', 'Date', 'Hour', 'Month', 'Day of Week'],
dtype='object')
df['timeStamp'] = pd.to_datetime(df['timeStamp'])
df['Hour'] = df['timeStamp'].dt.hour
df['Minute'] = df['timeStamp'].dt.minute
df['Second'] = df['timeStamp'].dt.second
df['ResponseTime'] = (df['timeStamp'] - df['timeStamp'].dt.normalize() +
pd.to_timedelta(df['Hour'], unit='h') +
pd.to_timedelta(df['Minute'], unit='m') +
pd.to_timedelta(df['Second'], unit='s')).dt.total_seconds() / 60
avg_response_time = df['ResponseTime'].mean()
median_response_time = df['ResponseTime'].median()
f"Average Response Time: {avg_response_time:.1f} min"
'Average Response Time: 1632.7 min'
f"Median Response Time: {median_response_time:.1f} min"
'Median Response Time: 1684.0 min'
###histogram to visualize the distribution of response times
plt.figure(figsize=(10, 6))
plt.hist(df['ResponseTime'], bins=50, color='b', edgecolor='black')
plt.title('Distribution of Response Times')
plt.xlabel('Response Time (minutes)')
plt.ylabel('Frequency')
plt.grid(True)
plt.tight_layout()
plt.show()
###average response time per day
avg_response_by_day = df.groupby('Day of Week')['ResponseTime'].mean().round(2)
avg_response_by_day
Day of Week Fri 1660.40 Mon 1626.55 Sat 1645.66 Sun 1585.38 Thu 1639.49 Tue 1626.79 Wed 1637.54 Name: ResponseTime, dtype: float64
avg_response_by_day
Day of Week Fri 1660.40 Mon 1626.55 Sat 1645.66 Sun 1585.38 Thu 1639.49 Tue 1626.79 Wed 1637.54 Name: ResponseTime, dtype: float64
plt.figure(figsize=(8, 6))
sns.pointplot(x=avg_response_by_day.index, y=avg_response_by_day.values)
plt.xlabel('Day of the Week')
plt.ylabel('Average Response Time')
plt.title('Average Response Time by Day of the Week')
plt.xticks(rotation=45, ha='right')
plt.grid(True)
plt.tight_layout()
plt.show()
avg_response_by_day_minutes = df.groupby('Day of Week')['Minute'].mean().round(2)
avg_response_by_day_minutes
Day of Week Fri 29.22 Mon 29.08 Sat 29.59 Sun 29.46 Thu 29.54 Tue 29.10 Wed 29.24 Name: Minute, dtype: float64
plt.figure(figsize=(8, 6))
sns.lineplot(x=avg_response_by_day_minutes.index, y=avg_response_by_day_minutes.values, marker='o')
plt.xlabel('Day of the Week')
plt.ylabel('Average Response Time (Minutes)')
plt.title('Average Response Time by Day of the Week (Minutes)')
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()
###Response Time Patterns by Hour
avg_response_time_by_hour = df.groupby('Hour')['ResponseTime'].mean().round(2)
avg_response_time_by_hour
Hour 0 57.84 1 176.90 2 297.31 3 418.98 4 540.05 5 661.51 6 782.51 7 903.00 8 1019.67 9 1139.01 10 1259.67 11 1379.83 12 1498.94 13 1618.20 14 1738.29 15 1859.36 16 1978.33 17 2099.77 18 2215.90 19 2337.48 20 2456.39 21 2576.80 22 2696.24 23 2816.08 Name: ResponseTime, dtype: float64
plt.figure(figsize=(10, 6))
plt.scatter(df['Hour'], df['ResponseTime'], label='Actual Response Time')
plt.plot(avg_response_time_by_hour.index, avg_response_time_by_hour.values, color='red', label='Average Response Time')
plt.xlabel('Hour of the Day')
plt.ylabel('Response Time')
plt.title('Comparison of Actual Response Time with Average Response Time by Hour')
plt.legend()
plt.grid(True)
plt.show()
This analysis offers valuable insights into the efficiency of emergency services in various townships, aiding resource allocation and optimization efforts for improving emergency response management.
avg_response_by_twp = df.groupby('twp')['ResponseTime'].mean().reset_index().round(2)
avg_response_by_twp
| twp | ResponseTime | |
|---|---|---|
| 0 | ABINGTON | 1634.26 |
| 1 | AMBLER | 1641.57 |
| 2 | BERKS COUNTY | 1686.44 |
| 3 | BRIDGEPORT | 1643.83 |
| 4 | BRYN ATHYN | 1638.87 |
| ... | ... | ... |
| 63 | WEST NORRITON | 1625.08 |
| 64 | WEST POTTSGROVE | 1723.38 |
| 65 | WHITEMARSH | 1662.63 |
| 66 | WHITPAIN | 1634.60 |
| 67 | WORCESTER | 1618.83 |
68 rows × 2 columns
plt.figure(figsize=(12, 6))
plt.bar(avg_response_by_twp['twp'], avg_response_by_twp['ResponseTime'], color='blue')
plt.title('Average Response Time by Township')
plt.xlabel('Township')
plt.ylabel('Average Response Time (minutes)')
plt.xticks(rotation=90)
plt.grid(True)
plt.show()
from scipy.stats import chi2_contingency
#chi-squared test
contingency_table_response = pd.crosstab(df['twp'], df['ResponseTime'])
chi2_response, p_response, dof_response, expected_response = chi2_contingency(contingency_table_response)
print("Chi-squared:", round(chi2_response, 1))
print("p-value:", round(p_response, 1))
print("Degrees of Freedom:", dof_response)
print("Expected Frequencies:\n", np.round(expected_response, 1))
Chi-squared: 215548.0 p-value: 0.0 Degrees of Freedom: 201536 Expected Frequencies: [[1.1 0.4 0.1 ... 0.1 0.1 0.1] [0.1 0. 0. ... 0. 0. 0. ] [0.1 0. 0. ... 0. 0. 0. ] ... [0.5 0.2 0. ... 0.1 0.1 0. ] [0.4 0.2 0. ... 0. 0. 0. ] [0.1 0.1 0. ... 0. 0. 0. ]]
The data reveals a strong link between township and response time, with a substantial chi-squared statistic and very low p-value highlighting a significant departure from independence. Expected frequencies further contextualize observed counts, offering insight into the robustness of this township-based Response Time Analysis.
Explore trends in 911 calls over time by analyzing call volumes across different hours, days, months, or even years. Time series analysis Can reveal patterns and trends in emergency call volumes and response times. Here are a few time-based analysis:
###Call Volume by Hour of the Day df['Hour'] = df['timeStamp'].dt.hour
call_volume_by_Hour = df.groupby('Hour').size()
call_volume_by_Hour
Hour 0 2112 1 1721 2 1549 3 1435 4 1418 5 1629 6 2513 7 3970 8 5044 9 5314 10 5413 11 5543 12 6029 13 5967 14 5997 15 6154 16 6490 17 6517 18 5762 19 4908 20 4377 21 3788 22 3283 23 2559 dtype: int64
plt.figure(figsize=(10, 6))
call_volume_by_Hour.plot(kind='bar', color='blue')
plt.title('Call Volume by Hour of the Day')
plt.xlabel('Hour of the Day')
plt.ylabel('Call Volume')
plt.grid(True)
plt.xticks(rotation=0)
plt.show()
df = df.merge(call_volume_by_Hour.rename('call_volume_by_Hour'), on='Hour', how='left')
###Call Volume by Day of the Week
call_volume_by_day = df.groupby('Day of Week').size()
call_volume_by_day
Day of Week Fri 14833 Mon 14680 Sat 13336 Sun 12136 Thu 14478 Tue 15150 Wed 14879 dtype: int64
df = df.merge(call_volume_by_day.rename('call_volume_by_day'), how='left', left_on='Day of Week', right_index=True)
plt.figure(figsize=(10, 6))
call_volume_by_day.plot(kind='bar', color='green')
plt.title('Call Volume by Day of Week')
plt.xlabel('Day of Week')
plt.ylabel('Call Volume')
plt.grid(True)
plt.xticks(range(7), rotation=45)
plt.show()
###Call Voulume_Monthly Call Trends
call_volume_by_month = df.groupby('Month').size()
call_volume_by_month
Month 1 13205 2 11467 3 11101 4 11326 5 11423 6 11786 7 12137 8 9078 12 7969 dtype: int64
plt.figure(figsize=(10, 6))
call_volume_by_month.plot(kind='line', marker='o', color='orange')
plt.title('Monthly Call Volume Trends')
plt.xlabel('Month')
plt.ylabel('Call Volume')
plt.grid(True)
plt.xticks(range(1, 13), ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
plt.show()
df = df.merge(call_volume_by_month.rename('call_volume_by_month'), how='left', left_on='Month', right_index=True)
###Call Volume_Peak Call Times
call_volume_by_Hour_day = df.groupby(['Day of Week', 'Hour']).size().unstack()
call_volume_by_Hour_day
| Hour | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Day of Week | |||||||||||||||||||||
| Fri | 275 | 235 | 191 | 175 | 201 | 194 | 372 | 598 | 742 | 752 | ... | 932 | 980 | 1039 | 980 | 820 | 696 | 667 | 559 | 514 | 474 |
| Mon | 282 | 221 | 201 | 194 | 204 | 267 | 397 | 653 | 819 | 786 | ... | 869 | 913 | 989 | 997 | 885 | 746 | 613 | 497 | 472 | 325 |
| Sat | 375 | 301 | 263 | 260 | 224 | 231 | 257 | 391 | 459 | 640 | ... | 789 | 796 | 848 | 757 | 778 | 696 | 628 | 572 | 506 | 467 |
| Sun | 383 | 306 | 286 | 268 | 242 | 240 | 300 | 402 | 483 | 620 | ... | 684 | 691 | 663 | 714 | 670 | 655 | 537 | 461 | 415 | 330 |
| Thu | 278 | 202 | 233 | 159 | 182 | 203 | 362 | 570 | 777 | 828 | ... | 876 | 969 | 935 | 1013 | 810 | 698 | 617 | 553 | 424 | 354 |
| Tue | 269 | 240 | 186 | 170 | 209 | 239 | 415 | 655 | 889 | 880 | ... | 943 | 938 | 1026 | 1019 | 905 | 731 | 647 | 571 | 462 | 274 |
| Wed | 250 | 216 | 189 | 209 | 156 | 255 | 410 | 701 | 875 | 808 | ... | 904 | 867 | 990 | 1037 | 894 | 686 | 668 | 575 | 490 | 335 |
7 rows × 24 columns
call_volume_by_hour_day = df.groupby(['Day of Week', 'Hour']).size().reset_index(name='call_volume_by_hour_day')
df = df.merge(call_volume_by_hour_day, how='left', on=['Day of Week', 'Hour'])
###Call Volume by Reasons
call_volume_by_reasons = df.groupby('title').size()
call_volume_by_reasons
title
EMS: ABDOMINAL PAINS 1436
EMS: ACTIVE SHOOTER 2
EMS: ALLERGIC REACTION 438
EMS: ALTERED MENTAL STATUS 1386
EMS: AMPUTATION 14
...
Traffic: HAZARDOUS ROAD CONDITIONS - 1086
Traffic: ROAD OBSTRUCTION - 3144
Traffic: VEHICLE ACCIDENT - 23066
Traffic: VEHICLE FIRE - 553
Traffic: VEHICLE LEAKING FUEL - 77
Length: 110, dtype: int64
call_volume_by_reasons = df.groupby('Reasons').size()
call_volume_by_reasons
Reasons EMS 48877 Fire 14920 Traffic 35695 dtype: int64
plt.figure(figsize=(8, 6))
ax = call_volume_by_reasons.plot(kind='bar', color=['blue', 'red', 'green'])
plt.title('Call Volume by Reason Category')
plt.xlabel('Reason Category')
plt.ylabel('Call Volume')
plt.grid(True)
plt.xticks(rotation=0)
x_labels = call_volume_by_reasons.index
ax.set_xticks(range(len(x_labels)))
ax.set_xticklabels(x_labels, fontsize=10)
plt.tight_layout()
plt.show()
df = df.merge(call_volume_by_day.rename('call_volume_by_reasons'), how='left', left_on='Day of Week', right_index=True)
### Call counts by per reasons[EMS,Fire,Traffic]
df['timeStamp'] = pd.to_datetime(df['timeStamp'])
df['Date'] = pd.to_datetime(df['Date'])
reasons_daily_call_count = df.groupby(['Reasons', 'Date']).size()
reasons_daily_call_count
Reasons Date
EMS 2015-12-10 57
2015-12-11 186
2015-12-12 189
2015-12-13 190
2015-12-14 222
...
Traffic 2016-08-20 117
2016-08-21 138
2016-08-22 156
2016-08-23 151
2016-08-24 42
Length: 777, dtype: int64
reasons_daily_call_count = reasons_daily_call_count.reset_index()
unique_reasons = reasons_daily_call_count['Reasons'].unique()
for reason in unique_reasons:
reason_data = reasons_daily_call_count[reasons_daily_call_count['Reasons'] == reason]
plt.figure(figsize=(10, 6))
plt.plot(reason_data['Date'], reason_data[0], marker='o', label=reason)
plt.xlabel('Date')
plt.ylabel('Call Count')
plt.title(f'Daily Call Count for {reason}')
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
plt.show()
reasons_call_count_pivot = reasons_daily_call_count.pivot(index='Date', columns='Reasons', values=0)
reasons_call_count_pivot.columns = [f'call_counts_{reason}_Day' for reason in reasons_call_count_pivot.columns]
reasons_call_count_pivot
| call_counts_EMS_Day | call_counts_Fire_Day | call_counts_Traffic_Day | |
|---|---|---|---|
| Date | |||
| 2015-12-10 | 57 | 15 | 43 |
| 2015-12-11 | 186 | 69 | 141 |
| 2015-12-12 | 189 | 68 | 146 |
| 2015-12-13 | 190 | 51 | 78 |
| 2015-12-14 | 222 | 39 | 186 |
| ... | ... | ... | ... |
| 2016-08-20 | 159 | 52 | 117 |
| 2016-08-21 | 165 | 54 | 138 |
| 2016-08-22 | 182 | 51 | 156 |
| 2016-08-23 | 218 | 70 | 151 |
| 2016-08-24 | 71 | 19 | 42 |
259 rows × 3 columns
df = df.merge(reasons_call_count_pivot, left_on='Date', right_index=True, how='left')
Analysing peak call times can provide insights into when emergency call volumes are highest during the day or week.
###Peak Call Times by Hour of the Day
peak_hours = call_volume_by_Hour.sort_values(ascending=False).head(3).index
peak_hours
Int64Index([17, 16, 15], dtype='int64', name='Hour')
plt.figure(figsize=(10, 6))
call_volume_by_Hour.plot(kind='bar', color='blue')
plt.title('Peak Call Times by Hour of the Day')
plt.xlabel('Hour of the Day')
plt.ylabel('Call Volume')
plt.grid(True)
plt.xticks(rotation=0)
plt.axvline(x=peak_hours[0], color='red', linestyle='--', label='1st Peak Hour')
plt.axvline(x=peak_hours[1], color='orange', linestyle='--', label='2nd Peak Hour')
plt.axvline(x=peak_hours[2], color='green', linestyle='--', label='3rd Peak Hour')
plt.legend()
plt.show()
###Peak Call Times by Day of the Week
peak_days = call_volume_by_day.sort_values(ascending=False).head(3).index
peak_days
Index(['Tue', 'Wed', 'Fri'], dtype='object', name='Day of Week')
plt.figure(figsize=(10, 6))
call_volume_by_day.plot(kind='bar', color='green')
plt.title('Peak Call Times by Day of the Week')
plt.xlabel('Day of the Week')
plt.ylabel('Call Volume')
plt.grid(True)
plt.xticks(range(7), ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'])
plt.axvline(x=peak_days[0], color='red', linestyle='--', label='1st Peak Day')
plt.axvline(x=peak_days[1], color='orange', linestyle='--', label='2nd Peak Day')
plt.axvline(x=peak_days[2], color='green', linestyle='--', label='3rd Peak Day')
plt.legend(loc='upper right', bbox_to_anchor=(1.20, 1))
plt.show()
In the context of 911 emergency call data, geographical visualizations can help you understand the distribution of calls across different geographic areas, identify hotspots, and gain insights into emergency call patterns.
### Heatmap of Call Density:
import folium
from folium.plugins import HeatMap, MarkerCluster
import matplotlib.image as mpimg
m_heatmap = folium.Map(location=[40.0, -75.5], zoom_start=10)
heat_data = [[row['lat'], row['lng']] for index, row in df.iterrows()]
HeatMap(heat_data).add_to(m_heatmap)
<folium.plugins.heat_map.HeatMap at 0x2305910c250>
m_heatmap.save('heatmap_map.html')
image_path = r"C:\Users\priya\02_Project_911 Calls\Heat Map.png"
img = mpimg.imread(image_path)
plt.imshow(img)
plt.axis('off')
plt.show()
###Marker Cluster Map
m_marker_cluster = folium.Map(location=[40.0, -75.5], zoom_start=10)
marker_cluster = MarkerCluster().add_to(m_marker_cluster)
for index, row in df.iterrows():
folium.Marker([row['lat'], row['lng']]).add_to(marker_cluster)
m_marker_cluster.save('marker_cluster_map.html')
image_path = r"C:\Users\priya\02_Project_911 Calls\marker_cluster_map.png"
img = mpimg.imread(image_path)
plt.imshow(img)
plt.axis('off')
plt.show()
###Comparison of Areas based on Call Volumes
call_count_by_area = df['twp'].value_counts().reset_index()
call_count_by_area.columns = ['twp', 'call_count']
call_count_by_area
| twp | call_count | |
|---|---|---|
| 0 | LOWER MERION | 8443 |
| 1 | ABINGTON | 5977 |
| 2 | NORRISTOWN | 5890 |
| 3 | UPPER MERION | 5227 |
| 4 | CHELTENHAM | 4575 |
| ... | ... | ... |
| 63 | BRYN ATHYN | 173 |
| 64 | SALFORD | 163 |
| 65 | GREEN LANE | 51 |
| 66 | PHILA COUNTY | 43 |
| 67 | LEHIGH COUNTY | 30 |
68 rows × 2 columns
plt.figure(figsize=(12, 6))
sns.barplot(x='twp', y='call_count', data=call_count_by_area)
plt.xticks(rotation=90)
plt.xlabel('Township')
plt.ylabel('Call Count')
plt.title('Comparison of Areas based on Call Volumes')
plt.tight_layout()
plt.grid()
plt.show()
call_count_by_area_sorted = call_count_by_area.sort_values(by='call_count', ascending=True)
plt.figure(figsize=(12, 6))
sns.barplot(x='twp', y='call_count', data=call_count_by_area_sorted)
plt.xticks(rotation=90)
plt.xlabel('Township')
plt.ylabel('Call Count')
plt.title('Comparison of Areas based on Call Volumes')
plt.tight_layout()
plt.show()
# Comparison of Areas based on Reason for Calls
call_count_by_reason_area = df.groupby(['twp', 'Reasons']).size().reset_index(name='Call Count')
call_count_by_reason_area
| twp | Reasons | Call Count | |
|---|---|---|---|
| 0 | ABINGTON | EMS | 3043 |
| 1 | ABINGTON | Fire | 940 |
| 2 | ABINGTON | Traffic | 1994 |
| 3 | AMBLER | EMS | 330 |
| 4 | AMBLER | Fire | 131 |
| ... | ... | ... | ... |
| 199 | WHITPAIN | Fire | 331 |
| 200 | WHITPAIN | Traffic | 768 |
| 201 | WORCESTER | EMS | 335 |
| 202 | WORCESTER | Fire | 148 |
| 203 | WORCESTER | Traffic | 292 |
204 rows × 3 columns
plt.figure(figsize=(12, 6))
sns.countplot(x='twp', data=df, hue='Reasons')
plt.xticks(rotation=90)
plt.xlabel('Township')
plt.ylabel('Call Count')
plt.title('Comparison of Townships based on Reasons for Calls')
plt.legend(title='Reason')
plt.show()
In the context of emergency call data, we can predict certain outcomes, such as call volume, response times, or the likelihood of a certain type of emergency call occurring.
Using Linear Regression to Predict Call Volume Based on the Hour of the Day
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import mean_absolute_error
X_hour = df[['Hour']]
y_hour = df['call_volume_by_Hour']
X_train_hour, X_test_hour, y_train_hour, y_test_hour = train_test_split(X_hour, y_hour, test_size=0.2, random_state=42)
model_hour = LinearRegression()
model_hour.fit(X_train_hour, y_train_hour)
LinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LinearRegression()
y_pred_hour = model_hour.predict(X_test_hour).astype(int)
mse_hour = mean_squared_error(y_test_hour, y_pred_hour)
r2_hour = r2_score(y_test_hour, y_pred_hour)
print("Mean Squared Error:", mse_hour)
print("R-squared:", r2_hour)
print("Actual Call Volumes:", y_test_hour.values)
print("Predicted Call Volumes:", y_pred_hour)
Mean Squared Error: 1916453.117794864 R-squared: 0.14969747035355263 Actual Call Volumes: [5967 5997 5967 ... 4908 5044 1435] Predicted Call Volumes: [4924 5027 4924 ... 5539 4411 3898]
The model's predicted call volumes closely match the actual call volumes, indicating effective hourly volume estimation
residuals_hour = (y_test_hour - y_pred_hour).round().astype(int)
residuals_hour
46085 1043
80582 970
43550 1043
83155 633
66628 824
...
18682 -1265
428 325
80318 -631
45271 633
90835 -2463
Name: call_volume_by_Hour, Length: 19899, dtype: int32
percent_error_hour = (((y_test_hour - y_pred_hour) / y_test_hour) * 100).round().astype(int)
percent_error_hour
46085 17
80582 16
43550 17
83155 13
66628 15
...
18682 -29
428 6
80318 -13
45271 13
90835 -172
Name: call_volume_by_Hour, Length: 19899, dtype: int32
# Predict call volume for all hours
hours = df['Hour'].unique()
hours_df = pd.DataFrame({'Hour': hours})
predicted_call_volume = model_hour.predict(hours_df).round().astype(int)
predicted_df = pd.DataFrame({'Hour': hours_df['Hour'], 'Predicted Call Volume': predicted_call_volume})
predicted_df
| Hour | Predicted Call Volume | |
|---|---|---|
| 0 | 17 | 5335 |
| 1 | 18 | 5437 |
| 2 | 19 | 5540 |
| 3 | 20 | 5643 |
| 4 | 21 | 5745 |
| 5 | 22 | 5848 |
| 6 | 23 | 5950 |
| 7 | 0 | 3591 |
| 8 | 1 | 3694 |
| 9 | 2 | 3796 |
| 10 | 3 | 3899 |
| 11 | 4 | 4001 |
| 12 | 5 | 4104 |
| 13 | 6 | 4207 |
| 14 | 7 | 4309 |
| 15 | 8 | 4412 |
| 16 | 9 | 4514 |
| 17 | 10 | 4617 |
| 18 | 11 | 4719 |
| 19 | 12 | 4822 |
| 20 | 13 | 4925 |
| 21 | 14 | 5027 |
| 22 | 15 | 5130 |
| 23 | 16 | 5232 |
# predict the Peak Call Hours and Off-Peak Call Hours:
peak_hour_idx = np.argmax(y_pred_hour)
peak_hour = X_hour.iloc[peak_hour_idx]['Hour']
off_peak_hour_idx = np.argmin(y_pred_hour)
off_peak_hour = X_hour.iloc[off_peak_hour_idx]['Hour']
print("Peak Call Hour:", peak_hour)
print("Off-Peak Call Hour:", off_peak_hour)
Peak Call Hour: 8 Off-Peak Call Hour: 17
Peak Call Hours and Off-Peak Call Hours: Model's prediction: the hour of the day that corresponds to the highest predicted call volume is 8 AM. model's predictions: the hour of the day that corresponds to the lowest predicted call volume is 5 PM.
indices_hour = np.arange(len(y_test_hour))
fig, axes = plt.subplots(3, 2, figsize=(15, 18))
# Plot 1: Actual vs. Predicted Call Volumes for Test Set (Scatter plot)
axes[0, 0].scatter(y_test_hour, y_pred_hour, color='blue', alpha=0.5)
axes[0, 0].plot(y_test_hour, y_test_hour, color='red', linestyle='--')
axes[0, 0].set_title('Actual vs. Predicted Call Volumes')
axes[0, 0].set_xlabel('Actual Call Volume')
axes[0, 0].set_ylabel('Predicted Call Volume')
axes[0, 0].legend(['Perfect Prediction', 'Data Points'])
axes[0, 0].grid(True)
# Plot 2: Residuals vs. Predicted Call Volumes (Scatter plot)
axes[0, 1].scatter(y_pred_hour, residuals_hour, color='red', alpha=0.5)
axes[0, 1].axhline(y=0, color='black', linewidth=0.8, linestyle='--')
axes[0, 1].set_title('Residuals vs. Predicted Call Volumes')
axes[0, 1].set_xlabel('Predicted Call Volume')
axes[0, 1].set_ylabel('Residuals')
axes[0, 1].grid(True)
# Plot 3: Peak and Off-Peak Call Volumes (Bar plot)
axes[1, 0].bar(['Peak Hour', 'Off-Peak Hour'], [y_pred_hour[peak_hour_idx], y_pred_hour[off_peak_hour_idx]], color=['orange', 'green'])
axes[1, 0].set_title('Peak and Off-Peak Call Volumes')
axes[1, 0].set_ylabel('Predicted Call Volume')
# Plot 4: Predicted Call Volume for All Hours (Line plot with Peak and Off-Peak Hours)
axes[1, 1].plot(hours, predicted_call_volume, color='red', label='Predicted Call Volume')
axes[1, 1].axvline(x=peak_hour, color='orange', linestyle='--', label='Peak Hour')
axes[1, 1].axvline(x=off_peak_hour, color='green', linestyle='--', label='Off-Peak Hour')
axes[1, 1].set_title('Predicted Call Volume for All Hours')
axes[1, 1].set_xlabel('Hour')
axes[1, 1].set_ylabel('Predicted Call Volume')
axes[1, 1].legend()
axes[1, 1].grid(True)
# Plot 5: Histogram of Residuals
axes[2, 0].hist(residuals_hour, bins=20, color='purple', alpha=0.7)
axes[2, 0].set_title('Histogram of Residuals')
axes[2, 0].set_xlabel('Residuals')
axes[2, 0].set_ylabel('Frequency')
fig.delaxes(axes[2, 1])
plt.tight_layout()
plt.show()
#histograms of actual and predicted call volumes
plt.figure(figsize=(8, 6))
plt.hist(y_test_hour, bins=20, label='Actual Call Volume', color='blue', alpha=0.5)
plt.hist(y_pred_hour, bins=20, label='Predicted Call Volume', color='red', alpha=0.5)
plt.xlabel('Call Volume')
plt.ylabel('Frequency')
plt.title('Distribution of Actual vs. Predicted Call Volumes')
plt.legend()
plt.grid(True)
plt.show()
A distribution plot (histogram) suggests that the model's predictions are aligned well with the actual data.
# The correlation between actual and predicted call volumes
correlation = np.corrcoef(y_test_hour, y_pred_hour)[0, 1]
rounded_correlation = round(correlation, 3)
print("Correlation between Actual and Predicted Call Volumes:", rounded_correlation)
Correlation between Actual and Predicted Call Volumes: 0.387
# heatmap of the correlation matrix
plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=.5)
plt.title('Correlation Heatmap: Actual vs. Predicted Call Volumes')
plt.show()
#Predicted Call Volume for Next 24 Hours (if 0 =12 AM & 23 =11 PM)
new_hours = pd.DataFrame({'Hour': range(0, 24)})
predicted_call_volume_new = model_hour.predict(new_hours).round().astype(int)
predicted_df = pd.DataFrame({'Hour': range(0, 24), 'Predicted Call Volume': predicted_call_volume_new})
predicted_df
| Hour | Predicted Call Volume | |
|---|---|---|
| 0 | 0 | 3591 |
| 1 | 1 | 3694 |
| 2 | 2 | 3796 |
| 3 | 3 | 3899 |
| 4 | 4 | 4001 |
| 5 | 5 | 4104 |
| 6 | 6 | 4207 |
| 7 | 7 | 4309 |
| 8 | 8 | 4412 |
| 9 | 9 | 4514 |
| 10 | 10 | 4617 |
| 11 | 11 | 4719 |
| 12 | 12 | 4822 |
| 13 | 13 | 4925 |
| 14 | 14 | 5027 |
| 15 | 15 | 5130 |
| 16 | 16 | 5232 |
| 17 | 17 | 5335 |
| 18 | 18 | 5437 |
| 19 | 19 | 5540 |
| 20 | 20 | 5643 |
| 21 | 21 | 5745 |
| 22 | 22 | 5848 |
| 23 | 23 | 5950 |
plt.figure(figsize=(10, 6))
plt.bar(new_hours['Hour'], predicted_call_volume_new, color='blue')
plt.xlabel('Hour of the Day')
plt.ylabel('Predicted Call Volume')
plt.title('Predicted Call Volume for Next 24 Hours')
plt.xticks(range(0, 24)) # Set x-axis ticks to match the hours
plt.grid(True)
plt.show()
X_reasons = df[['call_counts_EMS_Day', 'call_counts_Fire_Day', 'call_counts_Traffic_Day']]
y_reasons = df['call_volume_by_reasons']
X_train_reasons, X_test_reasons, y_train_reasons, y_test_reasons = train_test_split(X_reasons, y_reasons, test_size=0.2, random_state=42)
model_reasons = LinearRegression()
model_reasons.fit(X_train_reasons, y_train_reasons)
LinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LinearRegression()
y_pred_reasons = model_reasons.predict(X_test_reasons)
mse_reasons = mean_squared_error(y_test_reasons, y_pred_reasons)
r2_reasons = r2_score(y_test_reasons, y_pred_reasons)
mse_reasons
730006.7178424102
r2_reasons
0.21406761818651243
mae_reasons = mean_absolute_error(y_test_reasons, y_pred_reasons)
mae_reasons
672.5615355957387
residuals = y_test_reasons - y_pred_reasons
residuals
46085 423.044377
80582 360.396194
43550 441.557950
83155 -184.511407
66628 82.597804
...
18682 -557.441227
428 817.651102
80318 450.572313
45271 301.069717
90835 614.292669
Name: call_volume_by_reasons, Length: 19899, dtype: float64
percent_error = ((y_test_reasons - y_pred_reasons) / y_test_reasons) * 100
percent_error
46085 2.843231
80582 2.489268
43550 3.049855
83155 -1.274426
66628 0.555130
...
18682 -3.797284
428 5.512378
80318 3.028243
45271 2.050884
90835 4.054737
Name: call_volume_by_reasons, Length: 19899, dtype: float64
print("Actual Call Volumes by Reasons:", y_test_reasons.values)
print("Predicted Call Volumes by Reasons:", y_pred_reasons.round().astype(int))
Actual Call Volumes by Reasons: [14879 14478 14478 ... 14879 14680 15150] Predicted Call Volumes by Reasons: [14456 14118 14036 ... 14428 14379 14536]
coefficients = pd.Series(model_reasons.coef_, index=X_reasons.columns)
coefficients
call_counts_EMS_Day 13.749732 call_counts_Fire_Day -17.576344 call_counts_Traffic_Day 7.172358 dtype: float64
plt.figure(figsize=(10, 6))
coefficients.sort_values().plot(kind='barh')
plt.xlabel('Coefficient')
plt.ylabel('Reasons')
plt.title('Coefficients for Predicting Call Volume by Reasons')
plt.tight_layout()
plt.show()
feature_columns = ['call_counts_EMS_Day', 'call_counts_Fire_Day', 'call_counts_Traffic_Day']
predicted_call_counts = model_reasons.predict(reasons_call_count_pivot[feature_columns])
for i, reason in enumerate(['EMS', 'Fire', 'Traffic']):
reasons_call_count_pivot[f'Predicted_Call_Count_{reason}'] = predicted_call_counts[i]
for column in reasons_call_count_pivot.columns:
reasons_call_count_pivot[column] = reasons_call_count_pivot[column].round(2)
reasons_call_count_pivot
| call_counts_EMS_Day | call_counts_Fire_Day | call_counts_Traffic_Day | Predicted_Call_Count_EMS | Predicted_Call_Count_Fire | Predicted_Call_Count_Traffic | |
|---|---|---|---|---|---|---|
| Date | ||||||
| 2015-12-10 | 57 | 15 | 43 | 12487.86 | 14015.35 | 14110.04 |
| 2015-12-11 | 186 | 69 | 141 | 12487.86 | 14015.35 | 14110.04 |
| 2015-12-12 | 189 | 68 | 146 | 12487.86 | 14015.35 | 14110.04 |
| 2015-12-13 | 190 | 51 | 78 | 12487.86 | 14015.35 | 14110.04 |
| 2015-12-14 | 222 | 39 | 186 | 12487.86 | 14015.35 | 14110.04 |
| ... | ... | ... | ... | ... | ... | ... |
| 2016-08-20 | 159 | 52 | 117 | 12487.86 | 14015.35 | 14110.04 |
| 2016-08-21 | 165 | 54 | 138 | 12487.86 | 14015.35 | 14110.04 |
| 2016-08-22 | 182 | 51 | 156 | 12487.86 | 14015.35 | 14110.04 |
| 2016-08-23 | 218 | 70 | 151 | 12487.86 | 14015.35 | 14110.04 |
| 2016-08-24 | 71 | 19 | 42 | 12487.86 | 14015.35 | 14110.04 |
259 rows × 6 columns
actual_columns = ['call_counts_EMS_Day', 'call_counts_Fire_Day', 'call_counts_Traffic_Day']
predicted_columns = ['Predicted_Call_Count_EMS', 'Predicted_Call_Count_Fire', 'Predicted_Call_Count_Traffic']
reasons = ['EMS', 'Fire', 'Traffic']
indices = np.arange(len(y_test_reasons))
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
# 1. Line plot comparing actual and predicted call volumes
axes[0, 0].plot(indices, y_test_reasons, label='Actual Call Volumes', marker='o')
axes[0, 0].plot(indices, y_pred_reasons, label='Predicted Call Volumes', marker='x')
axes[0, 0].set_xlabel('Test Instance')
axes[0, 0].set_ylabel('Call Volumes')
axes[0, 0].set_title('Actual vs. Predicted Call Volumes')
axes[0, 0].legend()
axes[0, 0].set_xticks(indices)
# 2. Scatter plot comparing actual call volumes with predicted call volumes
axes[0, 1].scatter(y_test_reasons, y_pred_reasons)
axes[0, 1].set_xlabel('Actual Call Volumes')
axes[0, 1].set_ylabel('Predicted Call Volumes')
axes[0, 1].set_title('Actual vs. Predicted Call Volumes')
# 3. Residual plot
axes[1, 0].scatter(y_pred_reasons, residuals)
axes[1, 0].axhline(y=0, color='r', linestyle='--')
axes[1, 0].set_xlabel('Predicted Call Volumes')
axes[1, 0].set_ylabel('Residuals')
axes[1, 0].set_title('Residual Plot')
# 4. Histogram of residuals
axes[1, 1].hist(residuals, bins=20)
axes[1, 1].set_xlabel('Residuals')
axes[1, 1].set_ylabel('Frequency')
axes[1, 1].set_title('Histogram of Residuals')
plt.tight_layout()
plt.show()
This analysis employs logistic regression to predict changes in emergency call volumes. Utilizing historical call data, time features, and geographical information. The goal is to anticipate 911 Call volume increases or decreases in the upcoming day or week. (Features Utilized:Daily call counts (EMS, Fire, Traffic);Hourly, daily, and monthly call patterns;Day of the week, month, year;Time of day;Geographical coordinates (latitude, longitude);Responses data.)
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
numeric_columns = df.select_dtypes(include=['float64', 'int64']).columns
numeric_columns
Index(['lat', 'lng', 'zip', 'e', 'Hour', 'Month', 'Minute', 'Second',
'ResponseTime', 'call_volume_by_Hour', 'call_volume_by_day',
'call_volume_by_month', 'call_volume_by_hour_day',
'call_volume_by_reasons', 'call_counts_EMS_Day', 'call_counts_Fire_Day',
'call_counts_Traffic_Day'],
dtype='object')
X_features = ['lat', 'lng', 'zip','Hour', 'Month', 'ResponseTime', 'call_volume_by_Hour', 'call_volume_by_day',
'call_volume_by_month', 'call_volume_by_hour_day', 'call_volume_by_reasons']
df['Emergency_Call_Increase'] = (df['call_volume_by_day'] > df['call_volume_by_day'].shift(1)).astype(int)
df = df.dropna(subset=['Emergency_Call_Increase'])
X = df[X_features]
y = df['Emergency_Call_Increase']
imputer = SimpleImputer(strategy='constant', fill_value=0)
X_filled = imputer.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_filled, y, test_size=0.2, random_state=42)
model = LogisticRegression(max_iter=5000, solver='saga', fit_intercept=True)
model.fit(X_train, y_train)
LogisticRegression(max_iter=5000, solver='saga')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LogisticRegression(max_iter=5000, solver='saga')
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Model Accuracy:", accuracy)
Model Accuracy: 0.9987939092416704
new_data = pd.DataFrame({
'lat': [40.297876],
'lng': [-75.581294],
'zip': [19525.0],
'Hour': [17],
'Month': [12],
'ResponseTime': [8],
'call_volume_by_Hour': [500],
'call_volume_by_day': [12000],
'call_volume_by_month': [8000],
'call_volume_by_hour_day': [800],
'call_volume_by_reasons': [3000]})
new_data_filled = imputer.transform(new_data)
scaler = StandardScaler()
new_data_normalized = scaler.fit_transform(new_data_filled)
new_predictions = model.predict(new_data_normalized)
prediction_result = "increase" if new_predictions[0] == 1 else "decrease"
print(f"Prediction for the upcoming day: Emergency call volume will {prediction_result}")
Prediction for the upcoming day: Emergency call volume will decrease
fig, ax = plt.subplots()
ax.plot(new_data.columns[3:], new_data.iloc[0, 3:], marker='o', color='blue', label='Actual Call Volume')
predicted_changes = new_predictions - 0.5
predicted_call_volume = new_data.iloc[0, 3:] + predicted_changes * new_data.iloc[0, 3:]
ax.plot(new_data.columns[3:], predicted_call_volume, marker='x', color='red', linestyle='--', label='Predicted Call Volume Change')
ax.set_xlabel('Features')
ax.set_ylabel('Call Volume')
ax.set_title('Actual vs. Predicted Call Volume Change for the Upcoming Day')
ax.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
print(df.columns.tolist())
['lat', 'lng', 'desc', 'zip', 'title', 'timeStamp', 'twp', 'addr', 'e', 'Reasons', 'Date', 'Hour', 'Month', 'Day of Week', 'Minute', 'Second', 'ResponseTime', 'call_volume_by_Hour', 'call_volume_by_day', 'call_volume_by_month', 'call_volume_by_hour_day', 'call_volume_by_reasons', 'call_counts_EMS_Day', 'call_counts_Fire_Day', 'call_counts_Traffic_Day', 'Emergency_Call_Increase']
##1. Correlation between response time and other variables
selected_columns = ['lat', 'lng', 'zip','call_volume_by_Hour',
'call_volume_by_day', 'call_volume_by_month', 'call_volume_by_hour_day','ResponseTime',
'call_volume_by_reasons', 'call_counts_EMS_Day',
'call_counts_Fire_Day', 'call_counts_Traffic_Day']
selected_data = df[selected_columns]
response_time_correlations = df[selected_columns].corr()['ResponseTime'].drop('ResponseTime')
print(response_time_correlations)
lat -0.003078 lng -0.007033 zip 0.007129 call_volume_by_Hour 0.388279 call_volume_by_day 0.019112 call_volume_by_month -0.008003 call_volume_by_hour_day 0.354125 call_volume_by_reasons 0.019112 call_counts_EMS_Day 0.001282 call_counts_Fire_Day 0.007174 call_counts_Traffic_Day 0.014966 Name: ResponseTime, dtype: float64
Response time is positively correlated with call volume by hour and call volume by hour-day, indicating that higher call volumes during specific hours and hours of the day are associated with slightly longer response times. Additionally, there's a positive correlation between response time and call volume by day, traffic-related call counts, and fire-related call counts, albeit to a lesser extent. Geographical factors such as latitude and longitude have very weak correlations with response time, suggesting minimal linear influence.
##Heat Map
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap for ResponseTime and Other Variables')
plt.show()
# 2. Correlation between Call volume by day and other variables
call_volume_by_day_correlations = df[selected_columns].corr()['call_volume_by_day'].drop('call_volume_by_day')
call_volume_by_day_correlations
lat -0.014409 lng 0.028753 zip -0.006421 call_volume_by_Hour 0.056553 call_volume_by_month -0.040685 call_volume_by_hour_day 0.292287 ResponseTime 0.019112 call_volume_by_reasons 1.000000 call_counts_EMS_Day 0.313805 call_counts_Fire_Day -0.039610 call_counts_Traffic_Day 0.245589 Name: call_volume_by_day, dtype: float64
The daily call volume is moderately positively correlated with call volume by hour and call counts for EMS and traffic-related calls, and moderately negatively correlated with call volume by month and fire-related call counts, while showing a strong positive correlation with call volume by reasons. Geographical factors such as latitude and longitude have weak correlations with daily call volume.
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap for call_volume_by_day and Other Variables')
plt.show()
## 3. Correlation between Call Volume Metrics
call_volume_metrics = ['call_volume_by_Hour', 'call_volume_by_day', 'call_volume_by_month', 'call_volume_by_hour_day']
call_volume_data = df[call_volume_metrics]
correlation_matrix = call_volume_data.corr()
correlation_matrix
| call_volume_by_Hour | call_volume_by_day | call_volume_by_month | call_volume_by_hour_day | |
|---|---|---|---|---|
| call_volume_by_Hour | 1.000000 | 0.056553 | -0.009962 | 0.938172 |
| call_volume_by_day | 0.056553 | 1.000000 | -0.040685 | 0.292287 |
| call_volume_by_month | -0.009962 | -0.040685 | 1.000000 | -0.021217 |
| call_volume_by_hour_day | 0.938172 | 0.292287 | -0.021217 | 1.000000 |
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap: Call Volume Metrics')
plt.show()
The heatmap shows that higher call volumes during specific hours are positively correlated(0.056553)with higher daily call volumes and call volumes during certain days, while there is a very weak negative correlation (-0.009962)between call volumes during specific hours and call volumes by month. Additionally, call volumes during certain hours and call volumes during certain days are strongly positively correlated(0.938172).A weak negative correlation (-0.040685)between daily call volumes and call volumes by month.
# 4. Correlation between Call Counts and Call Volumes
call_count_variables = ['call_counts_EMS_Day', 'call_counts_Fire_Day', 'call_counts_Traffic_Day']
call_count_data = df[call_count_variables + call_volume_metrics]
correlation_matrix= call_count_data.corr()
correlation_matrix
| call_counts_EMS_Day | call_counts_Fire_Day | call_counts_Traffic_Day | call_volume_by_Hour | call_volume_by_day | call_volume_by_month | call_volume_by_hour_day | |
|---|---|---|---|---|---|---|---|
| call_counts_EMS_Day | 1.000000 | 0.242651 | 0.279677 | 0.023428 | 0.313805 | 0.149365 | 0.096936 |
| call_counts_Fire_Day | 0.242651 | 1.000000 | 0.678890 | -0.015422 | -0.039610 | 0.136494 | -0.027403 |
| call_counts_Traffic_Day | 0.279677 | 0.678890 | 1.000000 | 0.005892 | 0.245589 | 0.183948 | 0.060560 |
| call_volume_by_Hour | 0.023428 | -0.015422 | 0.005892 | 1.000000 | 0.056553 | -0.009962 | 0.938172 |
| call_volume_by_day | 0.313805 | -0.039610 | 0.245589 | 0.056553 | 1.000000 | -0.040685 | 0.292287 |
| call_volume_by_month | 0.149365 | 0.136494 | 0.183948 | -0.009962 | -0.040685 | 1.000000 | -0.021217 |
| call_volume_by_hour_day | 0.096936 | -0.027403 | 0.060560 | 0.938172 | 0.292287 | -0.021217 | 1.000000 |
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap: Call Counts vs. Call Volumes')
plt.show()
The correlation matrix reveals a positive correlation between 'call_counts_EMS_Day'(0.313805), call_counts_Traffic_Day'(0.245589) with 'call_volume_by_day', suggesting A higher EMS and traffic related call counts aligning with increased daily call volumes. there is also Negligible relationship (-0.015422) between 'call_volume_by_Hour' and 'call_counts_Fire_Day'also a Weak negative correlation (-0.039610) between 'call_counts_Fire_Day' and 'call_volume_by_day', indicating days with more fire-related calls might slightly lower daily call volumes. and a strong connection between fire-related call counts and traffic-related call counts (0.678890).
# 5. Correlation between Call Volumes and Call Reasons
call_volume_metrics = ['call_volume_by_Hour', 'call_volume_by_day', 'call_volume_by_month', 'call_volume_by_hour_day']
call_volume_reasons_variable = 'call_volume_by_reasons'
selected_columns = call_volume_metrics + [call_volume_reasons_variable]
selected_data = df[selected_columns]
correlation_matrix = selected_data.corr()
call_volume_by_reasons_correlations = correlation_matrix[call_volume_reasons_variable].drop(call_volume_reasons_variable)
call_volume_by_reasons_correlations
call_volume_by_Hour 0.056553 call_volume_by_day 1.000000 call_volume_by_month -0.040685 call_volume_by_hour_day 0.292287 Name: call_volume_by_reasons, dtype: float64
The correlation values indicate that there is a positive correlation between the 'call_volume_by_reasons' and 'call_volume_by_day' (0.056553), a weak negative correlation with 'call_volume_by_month' (-0.040685), and a positive correlation with 'call_volume_by_hour_day' (0.292287). The positive correlations indicate that an increase in the volume of calls categorized by reasons is generally associated with slight increases in other call volume metrics, while the negative correlation with the monthly call volume suggests a minor decrease. However, all these correlations are relatively weak, which means that the relationships are not strongly linear. Other factors could contribute to the patterns observed in the data
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Heatmap')
plt.show()
df
| lat | lng | desc | zip | title | timeStamp | twp | addr | e | Reasons | ... | ResponseTime | call_volume_by_Hour | call_volume_by_day | call_volume_by_month | call_volume_by_hour_day | call_volume_by_reasons | call_counts_EMS_Day | call_counts_Fire_Day | call_counts_Traffic_Day | Emergency_Call_Increase | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 40.297876 | -75.581294 | REINDEER CT & DEAD END; NEW HANOVER; Station ... | 19525.0 | EMS: BACK PAINS/INJURY | 2015-12-10 17:40:00 | NEW HANOVER | REINDEER CT & DEAD END | 1 | EMS | ... | 2120.000000 | 6517 | 14478 | 7969 | 1013 | 14478 | 57 | 15 | 43 | 0 |
| 1 | 40.258061 | -75.264680 | BRIAR PATH & WHITEMARSH LN; HATFIELD TOWNSHIP... | 19446.0 | EMS: DIABETIC EMERGENCY | 2015-12-10 17:40:00 | HATFIELD TOWNSHIP | BRIAR PATH & WHITEMARSH LN | 1 | EMS | ... | 2120.000000 | 6517 | 14478 | 7969 | 1013 | 14478 | 57 | 15 | 43 | 0 |
| 2 | 40.121182 | -75.351975 | HAWS AVE; NORRISTOWN; 2015-12-10 @ 14:39:21-St... | 19401.0 | Fire: GAS-ODOR/LEAK | 2015-12-10 17:40:00 | NORRISTOWN | HAWS AVE | 1 | Fire | ... | 2120.000000 | 6517 | 14478 | 7969 | 1013 | 14478 | 57 | 15 | 43 | 0 |
| 3 | 40.116153 | -75.343513 | AIRY ST & SWEDE ST; NORRISTOWN; Station 308A;... | 19401.0 | EMS: CARDIAC EMERGENCY | 2015-12-10 17:40:01 | NORRISTOWN | AIRY ST & SWEDE ST | 1 | EMS | ... | 2120.033333 | 6517 | 14478 | 7969 | 1013 | 14478 | 57 | 15 | 43 | 0 |
| 4 | 40.251492 | -75.603350 | CHERRYWOOD CT & DEAD END; LOWER POTTSGROVE; S... | NaN | EMS: DIZZINESS | 2015-12-10 17:40:01 | LOWER POTTSGROVE | CHERRYWOOD CT & DEAD END | 1 | EMS | ... | 2120.033333 | 6517 | 14478 | 7969 | 1013 | 14478 | 57 | 15 | 43 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 99487 | 40.132869 | -75.333515 | MARKLEY ST & W LOGAN ST; NORRISTOWN; 2016-08-2... | 19401.0 | Traffic: VEHICLE ACCIDENT - | 2016-08-24 11:06:00 | NORRISTOWN | MARKLEY ST & W LOGAN ST | 1 | Traffic | ... | 1332.000000 | 5543 | 14879 | 9078 | 789 | 14879 | 71 | 19 | 42 | 0 |
| 99488 | 40.006974 | -75.289080 | LANCASTER AVE & RITTENHOUSE PL; LOWER MERION; ... | 19003.0 | Traffic: VEHICLE ACCIDENT - | 2016-08-24 11:07:02 | LOWER MERION | LANCASTER AVE & RITTENHOUSE PL | 1 | Traffic | ... | 1334.066667 | 5543 | 14879 | 9078 | 789 | 14879 | 71 | 19 | 42 | 0 |
| 99489 | 40.115429 | -75.334679 | CHESTNUT ST & WALNUT ST; NORRISTOWN; Station ... | 19401.0 | EMS: FALL VICTIM | 2016-08-24 11:12:00 | NORRISTOWN | CHESTNUT ST & WALNUT ST | 1 | EMS | ... | 1344.000000 | 5543 | 14879 | 9078 | 789 | 14879 | 71 | 19 | 42 | 0 |
| 99490 | 40.186431 | -75.192555 | WELSH RD & WEBSTER LN; HORSHAM; Station 352; ... | 19002.0 | EMS: NAUSEA/VOMITING | 2016-08-24 11:17:01 | HORSHAM | WELSH RD & WEBSTER LN | 1 | EMS | ... | 1354.033333 | 5543 | 14879 | 9078 | 789 | 14879 | 71 | 19 | 42 | 0 |
| 99491 | 40.207055 | -75.317952 | MORRIS RD & S BROAD ST; UPPER GWYNEDD; 2016-08... | 19446.0 | Traffic: VEHICLE ACCIDENT - | 2016-08-24 11:17:02 | UPPER GWYNEDD | MORRIS RD & S BROAD ST | 1 | Traffic | ... | 1354.066667 | 5543 | 14879 | 9078 | 789 | 14879 | 71 | 19 | 42 | 0 |
99492 rows × 26 columns